// OpentTxl-C Version 11 garbage collector
// J.R. Cordy, Jan 2023

// Copyright 2023, James R. Cordy and others

// Permission is hereby granted, free of charge, to any person obtaining a copy of this software 
// and associated documentation files (the “Software”), to deal in the Software without restriction, 
// including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 
// and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, 
// subject to the following conditions:

// The above copyright notice and this permission notice shall be included in all copies 
// or substantial portions of the Software.

// THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE 
// AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

// The TXL treespace garbage collector
// Identifies and frees unused tree and kid nodes when transformation runs out

// Modification Log

// v11.0 Initial revision, adapted from OpenTxl 11.0

#define GARBAGE

// I/O, strings, memory allocation
#include "support.h"

// Global modules
#include "locale.h"
#include "limits.h"
#include "options.h"
#include "tokens.h"
#include "errors.h"
#include "trees.h"
#include "shared.h"
#include "charset.h"
#include "idents.h"
#include "symbols.h"
#include "rules.h"

// Parent module
#include "xform.h"

// Check interface consistency
#include "garbage.h"

static void garbageRecovery_markTree (const treePT t)
{
    // Mark as in use all subtree and kid nodes of the given tree
    // We mark as in use by negating kid trees (for kids) and the kidsKP link (for trees)
    // Empty trees are marked using NILMARK

    // Stack use limitation - to avoid recursion crashes
    checkstack ();

    if (t == nilTree) {
        return;
    }

    if ((tree_trees[t].kidsKP) > 0) {
        // not marked yet - mark the kids if any

        switch (tree_trees[t].kind) {

            case treeKind_order: case treeKind_repeat: case treeKind_list: case treeKind_generatelist: case treeKind_lookahead:
                {
                    for (kidPT k = tree_trees[t].kidsKP; k <= ((tree_trees[t].kidsKP) + (tree_trees[t].count)) - 1; k++) {
                        if (tree_kids[k] > 0) {
                            // kid unmarked so far
                            garbageRecovery_markTree (tree_kids[k]);
                            tree_setKidTree (k, -(tree_kids[k]));
                        } else if (tree_kids[k] == nilTree) {
                            tree_setKidTree (k, NILMARK);
                        }
                    }
                }
                break;

            case treeKind_choose: case treeKind_leftchoose: case treeKind_generaterepeat:
                {
                    kidPT k = tree_trees[t].kidsKP;
                    if (tree_kids[k] > 0) {
                        // kid unmarked so far
                        garbageRecovery_markTree (tree_kids[k]);
                        tree_setKidTree (k, -(tree_kids[k]));
                    } else if (tree_kids[k] == nilTree) {
                        tree_setKidTree (k, NILMARK);
                    }
                }
                break;

            case treeKind_expression: case treeKind_lastExpression: case treeKind_ruleCall:
                {
                    for (kidPT k = tree_trees[t].kidsKP; k <= maxKids; k++) {
                        if (tree_kids[k] == nilTree) {
                            tree_setKidTree (k, NILMARK);
                            break;
                        }
                        if (tree_kids[k] > 0) {
                            // kid unmarked so far
                            garbageRecovery_markTree (tree_kids[k]);
                            tree_setKidTree (k, -(tree_kids[k]));
                        } else if (tree_kids[k] == nilTree) {
                            tree_setKidTree (k, NILMARK);
                        }
                    }
                }
                break;

            default :
                // other kinds don't have kids
                break;
        }

        // mark this node
        tree_setKids (t, -(tree_trees[t].kidsKP));

    } else if (tree_trees[t].kidsKP == nilKid) {
        // mark this node
        tree_setKids (t, NILMARK);
    }
}

static void garbageRecovery_unmarkTrees (void) {
    // Unmark all tree nodes marked as in use by re-negating their kidsKP link

    // Since we don't disturb trees in the compiled TXL program, they are all still in use
    int treesUsed = transformer_lastCompileTree;

    for (treePT t = 1; t <= maxTrees; t++) {

        // If the tree node is marked as in use, restore its kids link by re-negating
        if (tree_trees[t].kidsKP < 0) {
            // this one is in use!
            if (t > transformer_lastCompileTree) {
                treesUsed += 1;
            }

            if (tree_trees[t].kidsKP == NILMARK) {
                tree_setKids (t, nilKid);
            } else {
                tree_setKids (t, -(tree_trees[t].kidsKP));
            }

        // Otherwise, we can free it unless it's in the compiled TXL program
        } else if (t > transformer_lastCompileTree) {
            tree_setKids (t, AVAILABLE);
            #ifdef CHECKED
                tree_setKind (t, treeKind_undefined);
                tree_setCount (t, 0);
                tree_setName (t, NOT_FOUND);
            #endif
        }
    }

    // Report stats if verbose
    if (options_option[verbose_p]) {
        fprintf (stderr, "--- %d trees were allocated, %d were in use\n", tree_treeCount, treesUsed);
    }

    tree_setTreeCount (treesUsed);
}

static void garbageRecovery_unmarkKids (void) {
    // Unmark all kid nodes marked as in use by re-negating their tree node link

    // Since we don't disturb trees and kids in the compiled TXL program, they are all still in use
    int kidsUsed = transformer_lastCompileKid;

    for (kidPT k = 1; k <= maxKids; k++) {
        // If the kid node is marked as in use, restore its tree link by re-negating
        if (tree_kids[k] < 0) {
            // this one is in use!
            if (k > transformer_lastCompileKid) {
                kidsUsed += 1;
            }
            if ((tree_kids[k]) == NILMARK) {
                tree_setKidTree (k, nilTree);
            } else {
                tree_setKidTree (k, -(tree_kids[k]));
            }

        // Otherwise, we can free it unless it's in the compiled TXL program
        } else if (k > transformer_lastCompileKid) {
            // not in use - mark it as available
            tree_setKidTree (k, AVAILABLE);
        }
    }

    if (options_option[verbose_p]) {
        fprintf (stderr, "--- %d kids were allocated, %d were in use\n", tree_kidCount, kidsUsed);
    }

    tree_setKidCount (kidsUsed);
}

#ifdef CHECKED
// Check the global integrity of the tree space after each garbage collection

static void garbageRecovery_garbageCollectionError (string *checkRule, string *checkContext)
{
    string context, message;
    stringprintf (context, "rule/function '%s'", (*checkRule));
    stringprintf (message, "Garbage collection failure, variable '%s'", (*checkContext));
    error(context, message, INTERNAL_FATAL, 921);
}

static string *garbageRecovery_checkContext;
static string *garbageRecovery_checkRule;

static void garbageRecovery_checkTree (const treePT t)
{
    // Stack use limitation - to avoid crashes
    checkstack ();

    if (t == nilTree) {
        return;
    }

    if ((tree_trees[t].kidsKP < 0) || (tree_trees[t].kidsKP == AVAILABLE)) {
        garbageRecovery_garbageCollectionError (garbageRecovery_checkRule, garbageRecovery_checkContext);
    }

    switch (tree_trees[t].kind) {
        case treeKind_order: case treeKind_repeat: case treeKind_list: case treeKind_generatelist: case treeKind_lookahead :
            {
                for (kidPT k = tree_trees[t].kidsKP; k <= (tree_trees[t].kidsKP + tree_trees[t].count) - 1; k++) {
                    garbageRecovery_checkTree (tree_kids[k]);
                }
            }
            break;
        case treeKind_choose: case treeKind_leftchoose: case treeKind_generaterepeat :
            {
                kidPT k = tree_trees[t].kidsKP;
                garbageRecovery_checkTree (tree_kids[k]);
            }
            break;
        case treeKind_expression: case treeKind_lastExpression: case treeKind_ruleCall :
            {
                for (kidPT k = tree_trees[t].kidsKP; k <= maxKids; k++) {
                    if (tree_kids[k] == nilTree) break;
                    garbageRecovery_checkTree (tree_kids[k]);
                }
            }
            break;
        default :
            // other kinds don't have kids
            break;
    }
}

static void garbageRecovery_checkActiveTrees (void) {
    // check the integrity of active trees

    // check top five rule call activation records and the main rule call (can't afford to check all in large code!)
    for (int cc = max (transformer_callDepth - 4, 1); cc <= transformer_callDepth + 1; cc++) {
        if (cc > 0) {
            int c = cc;
            if (c == transformer_callDepth + 1) {
                // do the main rule call
                c = 0;
            }

            garbageRecovery_checkRule = (string *) ident_idents[transformer_callEnvironment[c].name];
            garbageRecovery_checkContext = (string *) &("scope");
            garbageRecovery_checkTree (transformer_callEnvironment[c].scopeTP);

            struct ruleLocalsT *localVars = (transformer_callEnvironment[c].localsListAddr);
            for (int v = 1; v <= localVars->nlocals; v++) {
                garbageRecovery_checkContext = (string *) ident_idents[rule_ruleLocals[localVars->localsBase + v].name];
                garbageRecovery_checkTree (transformer_valueTP[transformer_callEnvironment[c].valuesBase + v]);
            }

            garbageRecovery_checkContext = (string *) &("result");
            garbageRecovery_checkTree (transformer_callEnvironment[c].resultTP);

            garbageRecovery_checkContext = (string *) &("newscope");
            garbageRecovery_checkTree (transformer_callEnvironment[c].newscopeTP);
        }
    }
}
#endif

void garbageRecovery_recoverGarbage (void) {
    // Attempt to recover garbage after the transformation runs out of treespace 

    // Strategy: 
    //  (1) We leave everything from the compiled TXL program alone.
    //      Thus we don't bugger up the ruleset.
    //  (2) We trace the scope and bound local variable trees of all active 
    //      rule call environments (including the present one), 
    //      marking all active trees and kids as in use by negating their links.
    //  (3) We zero all unmarked trees and kids to indicate they are free, 
    //      unmark all active trees and kids by re-negating their links,
    //      and change allocation strategy to scavenge free trees and kids.

    //  if we're still using linear allocation, mark all remaining free nodes and kids as free
    if (tree_allocationStrategy == simple) {
        for (treePT t = tree_treeCount + 1; t <= maxTrees; t++) {
            tree_setKind (t, treeKind_empty);
            tree_setKids (t, nilKid);
        }
        for (kidPT k = tree_kidCount + 1; k <= maxKids; k++) {
            tree_setKidTree (k, nilTree);
        }
    }

    // mark all trees and kids used in the ident table as active
    for (int id = 0; id <= maxIdents - 1; id++) {
        if (ident_identTree[id] != nilTree) {
            garbageRecovery_markTree (ident_identTree[id]);
        }
    }

    // mark all trees and kids used in the rule table as active
    // (possibly redundant? they should all be before lastCompileTree?? test using assertions -- JRC)
    for (int i = 1; i <= rule_nRules; i++) {
        struct ruleT *r = &(rule_rules[i]);

        // rules[*].patternTP, rules[*].replacementTP
        garbageRecovery_markTree (r->patternTP);
        garbageRecovery_markTree (r->replacementTP);

        // rules[*].prePattern[*].patternTP, rules[*].prePattern[*].replacementTP
        for (int p = 1; p <= r->prePattern.nparts; p++) {
            garbageRecovery_markTree (rule_ruleParts[r->prePattern.partsBase + p].patternTP);
            garbageRecovery_markTree (rule_ruleParts[r->prePattern.partsBase + p].replacementTP);
        }

        // rules[*].postPattern[*].patternTP, rules[*].postPattern[*].replacementTP
        for (int p = 1; p <= r->postPattern.nparts; p++) {
            garbageRecovery_markTree (rule_ruleParts[r->postPattern.partsBase + p].patternTP);
            garbageRecovery_markTree (rule_ruleParts[r->postPattern.partsBase + p].replacementTP);
        }
    }

    //  mark all trees and kids used in called rules we are in as active
    for (int c = 0; c <= transformer_callDepth; c++) {
        struct ruleLocalsT *localVars = (transformer_callEnvironment[c].localsListAddr);

        // calls[*].scopeTP
        garbageRecovery_markTree (transformer_callEnvironment[c].scopeTP);

        // calls[*].localVars[*].valueTP
        for (int v = 1; v <= localVars->nlocals; v++) {
            garbageRecovery_markTree (transformer_valueTP[transformer_callEnvironment[c].valuesBase + v]);
        }

        // calls[*].resultTP
        garbageRecovery_markTree (transformer_callEnvironment[c].resultTP);

        // calls[*].newscopeTP
        garbageRecovery_markTree (transformer_callEnvironment[c].newscopeTP);
    }

    // unmark active trees and kids, and nil all inactive trees and kids to indicate they are free
    garbageRecovery_unmarkTrees ();
    garbageRecovery_unmarkKids ();

    // check that we have enough space left to work in 
    if (((maxTrees - tree_treeCount) < (maxTrees / 10)) || ((maxKids - tree_kidCount) < (maxKids / 10))) {
        error ("", "Garbage recovery unable to recover enough space to continue (a larger size is required for this transform)", FATAL, 922);
    }

    // change allocation strategy to scavenge from free trees and kids
    tree_setAllocationStrategy (scavenge);

    #ifdef CHECKED 
        // Verify treespace integrity after garbage collection
        garbageRecovery_checkActiveTrees ();
    #endif
}
